###### Title  ##########################################################
## Candidate Qualifications and Out-Group Support:
## Evidence from Afghanistan by Jasmine Bhatia and
## Steve L. Monroe.
## Replication Command File for Creating Conjoint Dataset for Analysis 
## October 2024

# This file contains analysis and tables of the conjoint survey experiment
# presented in the main article 


## Table of Contents ########
## 1. Upload Data
## 2. Format Data for Conjoint Analysis
## 3. Create Independent and Dependent Variables and relevant subsets
## 4. Export conjoint formatted data



## 1. Upload Data, Install Packages #####

# load necessary packages

library(cjoint)
library(dplyr)
library(tidyr)
library(tidyverse)
library(survey)
library(Rcpp)
library(estimatr)
library(knitr)
library(here)
library(cli)
library(devtools)
devtools::install_github("m-freitag/cjpowR")
library(cjpowR)
library("xtable")
library("base")


# set working directory and load dataset

here()

data <- read.csv("Afghan_Data_All.csv")


## 2. Format Data and Clean Data for Conjoint Analysis ####
##    Create treatment and control variables
##    Set baselines for Marginal Means (MM)
##    analysis, load cregg package


# Label relevant covariates
covars <- c("Res_Gender", "Literacy", "HHhead",
            "Employ_Unem", "District","Head Education", "Res_Education",
            "Province", "Income", "Voted_Pres", "Res_Ethnicity") 


# Create separate lists of variables for each 
# leadership profile

profile_1 <- c("Rd_1_A_Gender", "Rd_1_A_Education", "Rd_1_A_Age", 
               "Rd_1_A_Ethnicity", "Rd_1_A_Professional_Experience",
               "Rd_1_A_Place_of_Birth")

profile_2 <- c("Rd_1_B_Gender", "Rd_1_B_Education", "Rd_1_B_Age", 
               "Rd_1_B_Ethnicity", "Rd_1_B_Professional_Experience",
               "Rd_1_B_Place_of_Birth")

profile_3 <- c("Rd_2_A_Gender", "Rd_2_A_Education", "Rd_2_A_Age", 
               "Rd_2_A_Ethnicity", "Rd_2_A_Professional_Experience",
               "Rd_2_A_Place_of_Birth")

profile_4 <- c("Rd_2_B_Gender", "Rd_2_B_Education", "Rd_2_B_Age", 
               "Rd_2_B_Ethnicity", "Rd_2_B_Professional_Experience",
               "Rd_2_B_Place_of_Birth")

profile_5 <- c("Rd_3_A_Gender", "Rd_3_A_Education", "Rd_3_A_Age", 
               "Rd_3_A_Ethnicity", "Rd_3_A_Professional_Experience",
               "Rd_3_A_Place_of_Birth")

profile_6 <- c("Rd_3_B_Gender", "Rd_3_B_Education", "Rd_3_B_Age", 
               "Rd_3_B_Ethnicity", "Rd_3_B_Professional_Experience",
               "Rd_3_B_Place_of_Birth")


# Create separate list of responses for each pair of profiles

responses_1 <- c("RD1_Con_Table_Choice", "RD_1_A_Rank", "RD_1_B_Rank")
responses_2 <- c("RD2_Con_Table_Choice", "RD_2_A_Rank", "RD_2_B_Rank")  
responses_3 <- c("RD3_Con_Table_Choice", "RD_3_A_Rank", "RD_3_B_Rank")    



data2_1 <- data[, c("ID", "Rd_1_A_Gender", "Rd_1_A_Education", "Rd_1_A_Age", 
                    "Rd_1_A_Ethnicity", "Rd_1_A_Professional_Experience",
                    "Rd_1_A_Place_of_Birth",
                    "RD1_Con_Table_Choice", "RD_1_A_Rank", "RD_1_B_Rank",
                    "Res_Gender", "Literacy", "HHhead",
                    "Employ_Unem", "District", "HH_Head_Edu", "Res_Education", 
                    "Province", "Income", "Voted_Pres", "Res_Ethnicity")]


data2_1$profile <- rep(1, 2485)                    


data2_1$Pair <- "A" #name of pair. Will use this to create a profile pair id

data2_2 <- data[, c("ID", "Rd_1_B_Gender", "Rd_1_B_Education", "Rd_1_B_Age", 
                    "Rd_1_B_Ethnicity", "Rd_1_B_Professional_Experience",
                    "Rd_1_B_Place_of_Birth",
                    "RD1_Con_Table_Choice", "RD_1_A_Rank", "RD_1_B_Rank",
                    "Res_Gender", "Literacy", "HHhead",
                    "Employ_Unem", "District", "HH_Head_Edu", "Res_Education", 
                    "Province", "Income", "Voted_Pres", "Res_Ethnicity")]


data2_2$profile <- rep(2, 2485)   

data2_2$Pair <- "A" #name of pair. Will use this to create a profile pair id

data2_3 <- data[, c("ID", "Rd_2_A_Gender", "Rd_2_A_Education", "Rd_2_A_Age", 
                    "Rd_2_A_Ethnicity", "Rd_2_A_Professional_Experience",
                    "Rd_2_A_Place_of_Birth",
                    "RD2_Con_Table_Choice", "RD_2_A_Rank", "RD_2_B_Rank",
                    "Res_Gender", "Literacy", "HHhead",
                    "Employ_Unem", "District", "HH_Head_Edu", "Res_Education", 
                    "Province", "Income", "Voted_Pres", "Res_Ethnicity")]


data2_3$profile <- rep(3, 2485)                    

data2_3$Pair <- "B" #name of pair. Will use this to create a profile pair id

data2_4 <- data[, c("ID", "Rd_2_B_Gender", "Rd_2_B_Education", "Rd_2_B_Age", 
                    "Rd_2_B_Ethnicity", "Rd_2_B_Professional_Experience",
                    "Rd_2_B_Place_of_Birth",
                    "RD2_Con_Table_Choice", "RD_2_A_Rank", "RD_2_B_Rank",
                    "Res_Gender", "Literacy", "HHhead",
                    "Employ_Unem", "District", "HH_Head_Edu", "Res_Education", 
                    "Province", "Income", "Voted_Pres", "Res_Ethnicity")]

data2_4$profile <- rep(4, 2485)  

data2_4$Pair <- "B" #name of pair. Will use this to create a profile pair id

data2_5 <- data[, c("ID", "Rd_3_A_Gender", "Rd_3_A_Education", "Rd_3_A_Age", 
                    "Rd_3_A_Ethnicity", "Rd_3_A_Professional_Experience",
                    "Rd_3_A_Place_of_Birth",
                    "RD3_Con_Table_Choice", "RD_3_A_Rank", "RD_3_B_Rank",
                     "Res_Gender", "Literacy", "HHhead",
                    "Employ_Unem", "District", "HH_Head_Edu", "Res_Education", 
                    "Province", "Income", "Voted_Pres", "Res_Ethnicity")]


data2_5$profile <- rep(5, 2485)                    

data2_5$Pair <- "C" #name of pair. Will use this to create a profile pair id

data2_6 <- data[, c("ID", "Rd_3_B_Gender", "Rd_3_B_Education", "Rd_3_B_Age", 
                    "Rd_3_B_Ethnicity", "Rd_3_B_Professional_Experience",
                    "Rd_3_B_Place_of_Birth",
                    "RD3_Con_Table_Choice", "RD_3_A_Rank", "RD_3_B_Rank",
                     "Res_Gender", "Literacy", "HHhead",
                    "Employ_Unem", "District", "HH_Head_Edu", "Res_Education", 
                    "Province", "Income", "Voted_Pres", "Res_Ethnicity")]


data2_6$profile <- rep(6, 2485)  

data2_6$Pair <- "C" #name of pair. Will use this to create a profile pair id

# Rename columns and rbind datasets back together
neutral_names <- c("ID", "Gender", "Education", "Age", 
                   "Ethnicity", "Career", "POB",
                   str_replace(responses_1, "_1", ""), 
                   covars, "profile", "Pair")

names(data2_1) <- neutral_names
names(data2_2) <- neutral_names
names(data2_3) <- neutral_names
names(data2_4) <- neutral_names
names(data2_5) <- neutral_names
names(data2_6) <- neutral_names


data2 <- data.frame(rbind(data2_1, data2_2, 
                          data2_3, data2_4, 
                          data2_5, data2_6 
))

# Add a unique row identifier
data2$num <- 1:14910

## Create Pair Variable

data2$PairID <- paste(data2$ID, data2$Pair, sep="")


### 3. Create Dependent and Independent Variables ####


## Put variables in correct format for cjoint package functions

# Rename data for easier typing: 
data <- data2 


## Dependent Variables

# Consolidate rating variables
data$rating <- case_when(
  data$profile %in% c(1,3,5) ~ data$RD_A_Rank,
  data$profile %in% c(2,4,6) ~ data$RD_B_Rank
)


# Note: NAs must be coded as zero for cjoint functions to work

data$choice_dummy <- ifelse(
  data$profile %in% c(1,3,5) & data$RD1_Con_Table_Choice == 1 |
    data$profile %in% c(2,4,6) & data$RD1_Con_Table_Choice == 2, 1, 0)


data$Choice <- data$choice_dummy
data$Rating <- data$rating

## Independent Variables

# Hazara vs. Non Hazara Candidates

# Hazara: Profile Attribute
data$Hazara <- ifelse(data$Ethnicity == "Hazara", "Hazara", "Not Hazara")

# Whether the respondent is Hazara

data$Res_Hazara <- ifelse(data$Res_Ethnicity == 5, 1, 0)

# Hazara vs. Non Hazara Male and Female Candidate

data$Hazara_Gender_Leader <- ifelse(data$Hazara == "Hazara" & data$Gender == "Female", "Hazara Female Leader",
                             ifelse(data$Hazara ==  "Not Hazara" & data$Gender == "Female", "Non-Hazara Female Leader",       
                            ifelse(data$Hazara == "Hazara" & data$Gender == "Male", "Hazara Male Leader",
                                 "Non-Hazara Male Leader")))


# Qualifications Variables for candidates


data$high_educated <- ifelse(data$Education == "University Degree in Afghanistan", "High",
                       ifelse(data$Education == "University Degree Abroad", "High", "Low"
                       ))
                             

data$high_educated_western <- ifelse(data$Education == "University Degree Abroad", "High", "Low")
                         

data$madrassa_educated <- ifelse(data$Education == "Madrassa", "Madrassa", "Other")


# Qualifications and gender variable


data$Education_Gender_Leader <- ifelse(data$high_educated == "High" & data$Gender == "Female", "High Educated Female Leader",
                                ifelse(data$high_educated == "Low" & data$Gender == "Female", "Low Educated Female Leader",       
                                 ifelse(data$high_educated == "High" & data$Gender == "Male", "High Educated Male Leader",
                                        "Low Educated Male Leader")))
                                        
# Qualifications and gender connection with respondent 

data$Education_Gender_Res_Connection <- ifelse(data$high_educated == "High" & data$Res_Gender == 2, "High Educated Leader Female Respondent",
                                     ifelse(data$high_educated == "Low" & data$Res_Gender == 2, "Low Educated Leader Female Respondent",       
                                      ifelse(data$high_educated == "High" & data$Res_Gender == 1, "High Educated Leader Male Respondent",       
                                        ifelse(data$high_educated == "Low" & data$Res_Gender == 1, "Low Educated Leader Male Respondent", 
                                         NA))))

data$Western_Education_Gender_Leader <- ifelse(data$high_educated_western == "High" & data$Gender == "Female", "Western Educated Female Leader",
                                        ifelse(data$high_educated_western == "Low" & data$Gender == "Female", "Non Western Educated Female Leader",       
                                          ifelse(data$high_educated_western == "High" & data$Gender == "Male", "Western Educated Male Leader",       
                                           ifelse(data$high_educated_western == "Low" & data$Gender == "Male", "Non Western Educated Male Leader", 
                                            NA))))

data$Western_Education_Gender_Leader_2 <- ifelse(data$Education == "University Degree in Afghanistan" & data$Gender == "Female", "High Educated Female Leader (Afghanistan)",
                                          ifelse(data$Education == "University Degree in Afghanistan" & data$Gender == "Male", "High Educated Male Leader (Afghanistan)",
                                        ifelse(data$Education == "University Degree Abroad" & data$Gender == "Female", "High Educated Female Leader (Western)",
                                        ifelse(data$Education == "University Degree Abroad" & data$Gender == "Male", "High Educated Male Leader (Western)",
                                        ifelse(data$Gender == "Female", "Low Educated Female Leader",
                                        "Low Educated Male Leader")))))


data$Madrassa_Gender_Leader <- ifelse(data$madrassa_educated == "Madrassa" & data$Gender == "Female", "Madrassa Educated Female Leader",
                                 ifelse(data$madrassa_educated == "Other" & data$Gender == "Female", "Non Madrassa Educated Female Leader",       
                                  ifelse(data$madrassa_educated == "Madrassa" & data$Gender == "Male", "Madrassa Educated Male Leader",
                                   "Non Madrassa Male Leader")))



# Qualfications and connections with respondent 

data$Western_Education_Gender_Res_Connection <- ifelse(data$high_educated_western == "High" & data$Res_Gender == 2, "Western Educated Leader Female Respondent",
                                           ifelse(data$high_educated_western == "Low" & data$Res_Gender == 2, "Non Western Educated Leader Female Respondent",       
                                           ifelse(data$high_educated_western == "High" & data$Res_Gender == 1, "Western Educated Leader Male Respondent",       
                                           ifelse(data$high_educated_western == "Low" & data$Res_Gender == 1, "Non Western Educated Leader Male Respondent", 
                                           NA))))




data$Education_Hazara_Leader <- ifelse(data$high_educated == "High" & data$Hazara  == "Hazara", "High Educated Hazara Leader",
                                ifelse(data$high_educated == "Low" & data$Hazara == "Hazara", "Low Educated Hazara Leader",       
                                ifelse(data$high_educated == "High" & data$Hazara == "Not Hazara", "High Educated Non-Hazara Leader",
                                    "Low Educated Non-Hazara Leader")))


data$Western_Education_Hazara_Leader_2 <- ifelse(data$Education == "University Degree in Afghanistan" & data$Hazara == "Hazara", "High Educated Hazara Leader (Afghanistan)",
                                           ifelse(data$Education == "University Degree in Afghanistan" & data$Hazara == "Not Hazara", "High Educated Non-Hazara Leader (Afghanistan)",
                                            ifelse(data$Education == "University Degree Abroad" & data$Hazara == "Hazara", "High Educated Hazara Leader (Western)",
                                            ifelse(data$Education == "University Degree Abroad" & data$Hazara == "Not Hazara", "High Educated Non-Hazara Leader (Western)",
                                            ifelse(data$Hazara == "Not Hazara", "Low Educated Non-Hazara Leader",
                                              "Low Educated Hazara Leader")))))

data$Madrassa_Hazara_Leader <- ifelse(data$madrassa_educated == "Madrassa" & data$Hazara == "Hazara", "Madrassa Educated Hazara Leader",
                                ifelse(data$madrassa_educated == "Other" & data$Hazara == "Hazara", "Non Madrassa Educated Hazara Leader",       
                                 ifelse(data$madrassa_educated == "Madrassa" & data$Hazara == "Not Hazara", "Madrassa Educated Non-Hazara Leader",
                                   "Non Madrassa Hazara Leader")))

# Interaction of Gender, Qualification and Hazara Group status

data$Hazara_Education_Gender_Leader <- ifelse(data$Hazara_Gender_Leader == "Hazara Female Leader" & 
                                       data$Education_Gender_Leader == "High Educated Female Leader",
                                       "High Educated Hazara Female Leader",
                                       ifelse(data$Hazara_Gender_Leader == "Hazara Male Leader" & 
                                        data$Education_Gender_Leader == "High Educated Male Leader",
                                        "High Educated Hazara Male Leader",
                                        ifelse(data$Hazara_Gender_Leader == "Hazara Female Leader" & 
                                         data$Education_Gender_Leader == "Low Educated Female Leader",
                                          "Low Educated Hazara Female Leader",
                                          ifelse(data$Hazara_Gender_Leader == "Hazara Male Leader" & 
                                           data$Education_Gender_Leader == "Low Educated Male Leader",
                                          "Low Educated Hazara Male Leader",
                                          ifelse(data$Hazara_Gender_Leader == "Non-Hazara Female Leader" & 
                                          data$Education_Gender_Leader == "High Educated Female Leader",
                                          "High Educated Non-Hazara Female Leader",
                                          ifelse(data$Hazara_Gender_Leader == "Non-Hazara Male Leader" & 
                                          data$Education_Gender_Leader == "High Educated Male Leader",
                                          "High Educated Non-Hazara Male Leader",
                                          ifelse(data$Hazara_Gender_Leader == "Non-Hazara Female Leader" & 
                                            data$Education_Gender_Leader == "Low Educated Female Leader",
                                            "Low Educated Non-Hazara Female Leader", 
                                            "Low Educated Non-Hazara Male Leader")))))))

# Repeat coding with other ethnic groups candidate qualifications
                                        
# Pashtun
                                       
data$Res_Pashtun <- ifelse(data$Res_Ethnicity == 1, 1, 0)

data$Pashtun <- ifelse(data$Ethnicity == "Pashto", "Pashtun", "Not Pashtun")
                                    
data$Education_Pashtun_Leader <- ifelse(data$high_educated == "High" & data$Pashtun  == "Pashtun", "High Educated Pashtun Leader",
                                  ifelse(data$high_educated == "Low" & data$Pashtun == "Pashtun", "Low Educated Pashtun Leader",       
                                   ifelse(data$high_educated == "High" & data$Pashtun == "Not Pashtun", "High Educated Non-Pashtun Leader",
                                 "Low Educated Non-Pashtun Leader")))

# Tajik 

data$Res_Tajik <- ifelse(data$Res_Ethnicity == 2, 1, 0)

data$Tajik <- ifelse(data$Ethnicity == "Tajik", "Tajik", "Not Tajik")

data$Education_Tajik_Leader <- ifelse(data$high_educated == "High" & data$Tajik  == "Tajik", "High Educated Tajik Leader",
                              ifelse(data$high_educated == "Low" & data$Tajik == "Tajik", "Low Educated Tajik Leader",       
                              ifelse(data$high_educated == "High" & data$Tajik == "Not Tajik", "High Educated Non-Tajik Leader",
                               "Low Educated Non-Tajik Leader")))

# Turkmen

data$Res_Turkmen <- ifelse(data$Res_Ethnicity == 4, 1, 0)

data$Turkmen <- ifelse(data$Ethnicity == "Turkmen", "Turkmen", "Not Turkmen")

data$Education_Turkmen_Leader <- ifelse(data$high_educated == "High" & data$Turkmen  == "Turkmen", "High Educated Turkmen Leader",
                              ifelse(data$high_educated == "Low" & data$Turkmen == "Turkmen", "Low Educated Turkmen Leader",       
                              ifelse(data$high_educated == "High" & data$Turkmen == "Not Turkmen", "High Educated Non-Turkmen Leader",
                               "Low Educated Non-Turkmen Leader")))

# Uzbek

data$Res_Uzbek <- ifelse(data$Res_Ethnicity == 3, 1, 0)

data$Uzbek <- ifelse(data$Ethnicity == "Uzbek", "Uzbek", "Not Uzbek")

data$Education_Uzbek_Leader <- ifelse(data$high_educated == "High" & data$Uzbek  == "Uzbek", "High Educated Uzbek Leader",
                                ifelse(data$high_educated == "Low" & data$Uzbek == "Uzbek", "Low Educated Uzbek Leader",       
                               ifelse(data$high_educated == "High" & data$Uzbek == "Not Uzbek", "High Educated Non-Uzbek Leader",
                                 "Low Educated Non-Uzbek Leader")))



## Respondent Characteristics that analysis will subset 

# variable equals one if respondent has some uni edu

data$Res_University <- ifelse(data$Res_Education == 5, 1,
                      ifelse(data$Res_Education == 6, 1, 
                             0))
                              
                     

## Respondent income
# high income equals one if respondent discloses earning at least 100,000 a year

data$Res_Highincome <- ifelse(data$Income == 7, 1,
                       ifelse(data$Income == 8, 1, 
                       0))


#### 4. Load Cregg Package, subset variables and export data ####


# Load cregg package fro MM analysis
# install.packages("cregg")
library("cregg") 
# note: cregg masks amce function from cjoint, 
# detach this package before attempting to 
# re-run amce analysis above or call cjoint::amce

# Set baselines for MM analysis
baselines <- list()
baselines$Gender <- "Male"
baselines$Education <- "Educated to High School"
baselines$Age <- "28"
baselines$Military <- "Civilian"
baselines$POB <- "Balkh"

# subset relevant variables

data2 <- data[, c("Choice", "Rating", "Hazara", "Res_Hazara", "Hazara_Gender_Leader",
                  "Education_Hazara_Leader",
                  "high_educated", "Gender", "Res_Gender", "ID", "high_educated_western", "madrassa_educated",
                  "Education_Gender_Leader", "Education_Gender_Res_Connection",
                  "Western_Education_Gender_Leader", "Western_Education_Gender_Res_Connection",
                  "Hazara_Education_Gender_Leader", "Western_Education_Gender_Leader_2",
                  "Western_Education_Hazara_Leader_2", "Madrassa_Gender_Leader", 
                  "Madrassa_Hazara_Leader", "Res_Pashtun", "Pashtun", "Education_Pashtun_Leader",
                  "Res_Tajik", "Tajik", "Education_Tajik_Leader", "Res_Turkmen", "Turkmen", "Education_Turkmen_Leader",
                  "Res_Uzbek", "Uzbek", "Education_Uzbek_Leader", "Res_University", "Res_Highincome",
                  "Ethnicity", "profile", "Pair", "Age", "Career", "POB")]
                  

                
# Assign variables as factors or integers
                
data2$ID2 <- as.integer(data2$ID)
data2$Gender <- as.factor(data2$Gender)
data2$Hazara <- as.factor(data2$Hazara)
data2$Res_Gender <- ifelse(data2$Res_Gender == 2, "Female Respondent", 
                    "Male Respondent")
data2$Res_Gender <- as.factor(data2$Res_Gender)
data2$Res_Hazara <- as.factor(data2$Res_Hazara)
data2$Hazara_Gender_Leader <- as.factor(data2$Hazara_Gender_Leader)
data2$Education_Hazara_Leader <- as.factor(data2$Education_Hazara_Leader)
data2$high_educated <- as.factor(data2$high_educated)
data2$high_educated_western <- as.factor(data2$high_educated_western)
data2$madrassa_educated <- as.factor(data2$madrassa_educated)
data2$Education_Gender_Leader <- as.factor(data2$Education_Gender_Leader)
data2$Western_Education_Gender_Leader <- as.factor(data2$Western_Education_Gender_Leader)
data2$Western_Education_Gender_Res_Connection <- as.factor(data2$Western_Education_Gender_Res_Connection)
data2$Hazara_Education_Gender_Leader <- as.factor(data2$Hazara_Education_Gender_Leader)
data2$Western_Education_Gender_Leader_2 <- as.factor(data2$Western_Education_Gender_Leader_2)
data2$Western_Education_Hazara_Leader_2 <- as.factor(data2$Western_Education_Hazara_Leader_2)
data2$Madrassa_Gender_Leader <- as.factor(data2$Madrassa_Gender_Leader)
data2$Madrassa_Hazara_Leader <- as.factor(data2$Madrassa_Hazara_Leader)
data2$Res_Pashtun <- as.factor(data2$Res_Pashtun)
data2$Pashtun <- as.factor(data2$Pashtun)
data2$Education_Pashtun_Leader <- as.factor(data2$Education_Pashtun_Leader)
data2$Res_Tajik <- as.factor(data2$Res_Tajik)
data2$Tajik <- as.factor(data2$Tajik)
data2$Education_Tajik_Leader <- as.factor(data2$Education_Tajik_Leader)
data2$Res_Turkmen <- as.factor(data2$Res_Turkmen)
data2$Turkmen <- as.factor(data2$Turkmen)
data2$Education_Turkmen_Leader <- as.factor(data2$Education_Turkmen_Leader)
data2$Res_Uzbek <- as.factor(data2$Res_Uzbek)
data2$Uzbek <- as.factor(data2$Uzbek)
data2$Education_Uzbek_Leader <- as.factor(data2$Education_Uzbek_Leader)
data2$Res_University <- as.factor(data2$Res_University)
data2$Res_Highincome <- as.factor(data2$Res_Highincome)
data2$Ethnicity <- as.factor(data2$Ethnicity)
data2$Age <- as.factor(data2$Age)
data2$POB <- as.factor(data2$POB)
data2$Career <- as.factor(data2$Career)

write.csv(data2, "conjoint_clean.csv", row.names = FALSE)




